/*
************************************************************************************************************************
	Creates estimation dataset $workdata/hoursofwork with information on:
	- monthly employment: wage
	- monthly hours of work: heltid_deltid

	Uses auxiliary datasets:
	- $temp/bef_red2_`year' (see "$data_do/bef_red")
	- $temp/mia_con_`year'  (see "$data_do/mia_con")

	Registers: Variables (Years) 
	- MIA: pnr (2000-2006)
	- CONESR: pnr, senr, arbnr, ansfra, anstil, helarkod, lonblb (2000-2005)
	- RAS: pnr, senr, arbnr, ansfra, anstil, helarkod, loenblb (2006)
	- RAS: pnr, arbnr, heltid_deltid_kode, loenblb (2000-2006)
	- DREAM: pnr, y_01-y_52(53) (2000-2006)

************************************************************************************************************************
*/

***************************************************************
* collect observations on hours of work (heltid_deltid_kode).
* Save temporarily in `ras`year''
***************************************************************

forvalues year=2000/2006 {

tempfile ras`year'
use pnr arbnr`year' heltid_deltid_kode`year' loenblb`year' using "$rawdata/ras`year'", clear
rename (arbnr`year' heltid_deltid_kode`year' loenblb`year') (arbnr heltid_deltid_kode lonblb) 

keep if arbnr!=""
keep if lonblb!=0
g e=1
bysort pnr arbnr lonblb: egen same_lonblb=sum(e)
drop if same_lonblb!=1
drop same_lonblb
save `ras`year''

*******************************************
* Merge mia_pnr_`year' (see $data_do/mia_con.do) with conesr`year' (or ras`year' if year==2006) for each individual in each firm. 
*******************************************

if `year'<2006  use pnr senr arbnr ansfra anstil helarkod lonblb  using "$rawdata/conesr`year'", clear
if `year'>=2006 use pnr senr`year' arbnr`year' ansfra`year' anstil`year' helarkod`year' loenblb`year'  using "$rawdata/ras`year'", clear
if `year'>=2006 rename (senr`year' arbnr`year' ansfra`year' anstil`year' helarkod`year' loenblb`year' ) (senr arbnr ansfra anstil helarkod lonblb )

drop if senr=="."

if `year'>=2006 tostring ansfra, replace
capture drop con_fra_month
g con_fra_month=substr(ansfra,1,length(ansfra)-2)
destring con_fra_month, replace
g con_fra_day=substr(ansfra,length(ansfra)-1,2)
destring con_fra_day, replace

if `year'>=2006 tostring anstil, replace
capture drop con_til_month
g con_til_month=substr(anstil,1,length(anstil)-2)
destring con_til_month, replace
g con_til_day=substr(anstil,length(ansfra)-1,2)
destring con_til_day, replace

capture drop _merge
merge m:1 pnr senr using "$temp/mia_pnr_`year'" , keep(match using)

*******************************************
* Define number of observation per individual per firm.
*******************************************

capture drop e
g e=1
capture drop pnrse_num
sort pnr senr lonblb
by pnr senr: g pnrse_num=sum(e)

*******************************************
* Define sum of earnings per individual per firm.
*******************************************

capture drop lonsum_se
sort pnr senr pnrse_num
by pnr senr: g lonsum_se=sum(lonblb)

*******************************************
* Detect and delete duplicates.
*******************************************

gsort pnr senr -pnrse_num
capture drop pnrse_num2
by pnr senr: g pnrse_num2=sum(e)

bysort pnr lonblb arbnr: egen same=sum(e)
keep if lonblb!=.
keep if same==1
drop same

***************************************************************
* Merge information on hours of work from temporary file `ras`year''.
* Save $temp/hoursofwork_mia_con_`year'
***************************************************************

capture drop _merge
merge 1:1 pnr lonblb arbnr using `ras`year'', keep(master match) 
capture drop _merge

g pnrse=pnrse_num2==1
sort pnr senr pnrse_num2
replace pnrse=sum(pnrse)

capture drop heltid_deltid_h  
g heltid_deltid_h=heltid_deltid_kode if pnrse_num2==1

***************************************************************
* Distribute information on hours of work across multiple observations for same individual in same firm. 
***************************************************************

sum pnrse_num2
local pnr_max=r(max)
forvalues p=2/`pnr_max' {
local p2=`p'-1
replace heltid_deltid_h=heltid_deltid_kode[_n+`p2'] if heltid_deltid_h==. & pnrse==pnrse[_n+`p2']
}

*******************************************
* Keep one observation per individual per firm.
*******************************************
keep if pnrse_num2==1             

capture drop pnrse_num pnrse_num2

*******************************************
* Generate variable h_1-h_12 with hours of work in months with employment.
*******************************************

forvalues month =1/12 {
g h_`month'=.
replace h_`month'=m_`month'*heltid_deltid_kode if m_`month'!=.
}

*******************************************
* Generate variable h2_1-h2_12 with hours of work in months with employment.
* Distribute information on hours of work across multiple observations for same individual. 
*******************************************

forvalues month =1/12 {
capture drop h2_`month' 
capture drop pnr_num
gsort pnr -l_`month'
by pnr: g pnr_num=sum(e)
g h2_`month'=h_`month' if pnr_num==1

sum pnr_num
local pnr_num_max=r(max)
forvalues p=2/`pnr_num_max' {
local p2=`p'-1
replace h2_`month'=h_`month'[_n+`p2'] if h2_`month'==. & pnr==pnr[_n+`p2']
capture drop h_month_h 
}
}

*******************************************
* Keep one observation per individual.
* Save hoursofwork_mia_con_`year'.
*******************************************

keep if pnr_num==1

capture drop _merge
save "$temp/hoursofwork_mia_con_`year'", replace
capture drop _merge
}
*********************************************************************************************

***************************************************************
* Use data with population characteristics
***************************************************************


forvalues year = 2000/2006 {

if `year'==2000 local day1=mdy(1,3,2000)
if `year'==2001 local day1=mdy(1,1,2001)
if `year'==2002 local day1=mdy(12,31,2001)
if `year'==2003 local day1=mdy(12,30,2002)
if `year'==2004 local day1=mdy(12,29,2003)
if `year'==2005 local day1=mdy(1,3,2005)
if `year'==2006 local day1=mdy(1,2,2006)


use "$temp/BEF/bef_red2_`year'", clear

***************************************************************
* Limit sample to include only those with basic education (edul=="10") and ages 19-35 at start of year.
* (to limit computations)
***************************************************************

keep if alderl>=19 & alderl<=35
keep if edul=="10" 

***************************************************************
* Merge with data with hours of work (hoursofwork_mia_con_`year').
***************************************************************

capture drop _merge
if year <  2008 merge 1:m pnr using "$temp/hoursofwork_mia_con_`year'" ,  keep(master match)


***************************************************************
* Merge with data with weekly public income transfers y_1-y_52(y_53).
***************************************************************

capture drop _merge
merge m:1 pnr using "$rawdata/dream`year'", keep(master match)
preserve
keep y_*
local num_weeks=c(k)
restore
display `num_weeks'
forvalues yvar= 1/9 {
rename y_0`yvar' y_`yvar'
}

***************************************************************
* Calculate weekly age for each week of year.
***************************************************************

forvalues dream_week = 1/`num_weeks' {
g age_`dream_week'= ((`day1'+(7*(`dream_week'))-foed_dag))/365.25
}

***************************************************************
* Create weekly variables of employment and hours of work.
***************************************************************

forvalues week = 1/`num_weeks'  {
g w_`week'=0
g s_`week'=0
g month_`week'=0
}

forvalues month = 1/12 {
forvalues week = 1/`num_weeks'  {

if `month'<12 ///
&  age_`week'>= ((mdy(`month',1,`year') - foed_dag)/365.25) ///
&  age_`week'<  ((mdy(`month'+1,1,`year') - foed_dag)/365.25) {
if year <  2008 replace w_`week'=m3_`month'
if year <  2008 replace s_`week'=h2_`month'    
replace month_`week'=`month'
}

if `month'==12 ///
&  age_`week'>= ((mdy(`month',1,`year') - foed_dag)/365.25) ///
&  age_`week'<=  ((mdy(12,31,`year') - foed_dag)/365.25) {
if year <  2008 replace w_`week'=m3_`month'    if year <  2008
if year <  2008 replace s_`week'=h2_`month'    
replace month_`week'=`month'
}
}
}


***************************************************************
* Create weekly observations of employment (w), hours of workk (s), and public income transfer (y) in weeks 1-`num_weeks'.
* (wide to long)
* Save hoursofwork_2_`year'
***************************************************************

expand `num_weeks'
sort pnr 
capture drop week
gen week=1
bysort pnr: replace week=sum(week)
capture drop age
gen age=0
capture drop month
gen month=0
capture drop y
gen y=0
capture drop w
gen w=0
capture drop s
g s=.

forvalues week=1/`num_weeks' {
bysort pnr: replace age=age_`week' if week==`week'
bysort pnr: replace y=y_`week' if week==`week'
bysort pnr: replace w=w_`week' if week==`week'
bysort pnr: replace s=s_`week' if week==`week'
bysort pnr: replace month=month_`week' if week==`week'
}

capture drop m_* 
capture drop m3_*
capture drop l_* 
capture drop l2_* 
capture drop s_* 
capture drop h2_* 
forvalues d=1/12 {
capture drop d_`d'
}
 
drop y_* age_* month_* w_*   _merge
save "$temp/hoursofwork_2_`year'", replace
}

********************************************
* Applying baseline selelction criteria.
********************************************

forvalues year = 2000/2006 {
use "$temp/hoursofwork_2_`year'", clear

keep if ie_type==1
keep if edul=="10" 
keep if eduf=="10" 
keep if fm_markl==6
keep if fm_markf==6 
keep if d_childl==0 
keep if d_childf==0 
keep if civstl=="U" 
keep if civstf=="U" 


********************************************
* Defining weekly ordinary employment (wage) and hours of work, and social assistance (sa).
* Save hoursofwork_3_`year'
********************************************

capture drop wage
g wage=0
replace wage=1 if w>0 & w!=.
replace wage=0 if y==651 | y==652 /*Study grant*/
replace wage=0 if y==135 | y==136 | y==145 | y==146  ///
                | y==215 | y==216                    ///
				| y==705 | y==706 | y==715 | y==716  ///
				| y==725 | y==726 | y==735 | y==736  ///
				| y==745 | y==746 | y==755 | y==756 | y==765 | y==766 | y==815 | y==816 | y==875 | y==876  ///
				| y==895 | y==896 /*Wage subsidy program*/  ///
				| y==997          /*Deceaced*/ 


capture drop sa
g sa=0
replace sa=1 if y>=130 & y<=139 ///
              | y>=140 & y<=149 ///
			  | y>=700 & y<=709 ///
			  | y>=710 & y<=719 ///
			  | y>=720 & y<=729 ///
			  | y>=730 & y<=739 /*social assistance*/



save "$temp/hoursofwork_3_`year'", replace
}

***************************************************
*Collect yearly dataset to one.
*Define age in months
*Save hoursofwork
***************************************************

use pnr foed_dag age week month year y sa wage heltid_deltid heltid_deltid_kode ans_mia_md2 koen edul eduf d_childl d_childf ie_type  using "$temp/hoursofwork_3_2000", clear
forvalues year=2001/2006 {
append using "$temp/hoursofwork_3_`year'", keep(pnr foed_dag age week month year y sa wage heltid_deltid heltid_deltid_kode ans_mia_md2 koen edul eduf d_childl d_childf ie_type)
}
save "$workdata/hoursofwork", replace